%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
import ipyparallel as ipp
from time import time
from datetime import datetime
import motif as mf
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, RepeatedKFold
from scipy.stats import spearmanr
from scipy.stats import pearsonr
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
### set parameters for the motif analysis
PROTEIN_NAME='T7 primase'
PROT_CONC=5 # free protein concentration at binding reation; PBM typically 0.1 and RNACompete typically 0.002
BOTH_STRANDS=False # wheter both strands are present for binding; True if double-stranded DNA or RNA is used as probes
STAGES=mf.stage(protein=PROTEIN_NAME)
### read data
dfprobes_raw=pd.read_csv('./data/T7/chip_B_favor.csv', sep=';')
print('Columns of imported Data File: %s'%dfprobes_raw.columns)
Columns of imported Data File: Index(['0_nuc', '1_nuc', '2_nuc', '3_nuc', '4_nuc', '5_nuc', '6_nuc', '7_nuc',
'8_nuc', '9_nuc', '10_nuc', '11_nuc', '12_nuc', '13_nuc', '14_nuc',
'15_nuc', '16_nuc', '17_nuc', '18_nuc', '19_nuc', '20_nuc', '21_nuc',
'22_nuc', '23_nuc', '24_nuc', '25_nuc', '26_nuc', '27_nuc', '28_nuc',
'29_nuc', '30_nuc', '31_nuc', '32_nuc', '33_nuc', '34_nuc', '35_nuc',
'prim_only_score', 'prim', 'poly', 'seq'],
dtype='object')
### select columns for probe sequence and signal
column_sequence='seq'
column_signal='prim_only_score'
#background_signal='mean_background_intensity'
background_signal=None #set to None if not needed
#basic preprocessing
dfprobes_raw[column_signal]=dfprobes_raw[column_signal].apply(lambda a: np.NaN if a==' ' else a)
dfprobes_raw[column_sequence]=dfprobes_raw[column_sequence].apply(lambda a: np.NaN if a=='nan' else a)
dfprobes_raw=dfprobes_raw.dropna()
#construct new dataframe with only necessary data
if type(background_signal)==type(None):
dfprobes=pd.DataFrame({'seq':dfprobes_raw[column_sequence].astype(str),
'signal binding': dfprobes_raw[column_signal].astype(np.float32)}) #rebuild dataframe
else:
dfprobes=pd.DataFrame({'seq':dfprobes_raw[column_sequence].astype(str),
'signal': dfprobes_raw[column_signal].astype(np.float32),
'background':dfprobes_raw[background_signal].astype(np.float32)}) #rebuild dataframe
dfprobes['signal binding']=dfprobes['signal']-dfprobes['background']
# display main properties of data set
dfprobes['signal binding']=dfprobes['signal binding']**2 # as raw data was squared
dfprobes['signal binding'].plot(figsize=(15,5))
dfprobes.describe()
### check type of nucleic acid
dfprobes['seq']=dfprobes['seq'].apply(lambda seq: seq.upper().replace(" ", "")) #upper and remove blanks
dfprobes['RNA']=dfprobes['seq'].apply(lambda seq: all(char in 'ACGU' for char in seq))
dfprobes['DNA']=dfprobes['seq'].apply(lambda seq: all(char in 'ACGT' for char in seq))
non_RNA_counts=len(dfprobes[dfprobes['RNA']==False])
non_DNA_counts=len(dfprobes[dfprobes['DNA']==False])
if non_RNA_counts<non_DNA_counts:
NUC_TYPE='RNA'; print('I: RNA probes detected!')
else: NUC_TYPE='DNA'; print('I: DNA probes detected!')
if NUC_TYPE=='RNA' and non_RNA_counts!=0:
print('E: The probe sequences appear to be RNA, however there are some non-RNA nucleotides in the sequences.')
print('E: Please check the following sequnces %s'%dfprobes[dfprobes['RNA']==False])
if NUC_TYPE=='DNA' and non_DNA_counts!=0:
print('E: The probe sequences appear to be RNA, however there are some non-RNA nucleotides in the sequences.')
print('E: Please check the following sequnces %s'%dfprobes[dfprobes['DNA']==False])
I: DNA probes detected!
### option to add a constant sequence at the 3' end and 5' end
sequence_to_be_added_5=''
sequence_to_be_added_3='' # standard PBM arrays: CCTGTGTGAAATTGTTATCCGCTCT T7 array: GTCTTGA..
dfprobes['seq']=sequence_to_be_added_5.upper()+dfprobes['seq']+sequence_to_be_added_3.upper()
print(f"I: The nucleotide sequences {sequence_to_be_added_5.upper()} has been added to the 5' end all probe sequences.")
print(f"I: The nucleotide sequences {sequence_to_be_added_3.upper()} has been added to the 3' end all probe sequences.")
I: The nucleotide sequences has been added to the 5' end all probe sequences. I: The nucleotide sequences has been added to the 3' end all probe sequences.
### egalize length
dfprobes['seq_length']=dfprobes['seq'].apply(len)
if max(dfprobes['seq_length'])!=min(dfprobes['seq_length']):
print('I: Probes length is not uniform, detected range: %i ..%i' %(min(dfprobes['seq_length']),max(dfprobes['seq_length'])))
max_length=max(dfprobes['seq_length'])
dfprobes['padded_sequence']=dfprobes['seq'].apply(lambda seq: seq+((max_length-len(seq))*'-'))
print("I: Probe sequences have been padded at the 5' to the uniform length of %i nucleotides" %max_length)
else:
print('I: Probe sequences have the uniform length of %i nucleotides' %(dfprobes['seq_length'].median()))
dfprobes['padded_sequence']=dfprobes['seq']
print('I: Total datasets contains %i sequences.' % len(dfprobes))
# visualize composition of each position
df_nucleotides=mf.split_sequence_in_nucleotides(dfprobes['padded_sequence'])
dfcount=pd.DataFrame(index=['A', 'C', 'G', 'T', 'U', '-'])
for column in df_nucleotides:
dfcount[column]=df_nucleotides[column].value_counts()
dfcount=dfcount.fillna(0) #zeros for NaN
dfcount.transpose().plot(figsize=(15,5), kind='bar')
print('I: Visualisation of the base composition per position')
print('I: If positions are invariant they can be removed before sequence analysis.')
I: Probe sequences have the uniform length of 36 nucleotides I: Total datasets contains 3149 sequences. I: Visualisation of the base composition per position I: If positions are invariant they can be removed before sequence analysis.
# You may remove invariant continuos positions by adjusting the slicing.
# It is recommended to leave a few invariant positions to allow for binding events
# between the variable and constant part of the probes.
dfprobes['padded_sequence']=dfprobes['padded_sequence'].apply(lambda s:s[:43]) ### <==== do the slicing here
# visualize composition of each position
print('I: Visualisation of the base composition per position after slicing.')
df_nucleotides=mf.split_sequence_in_nucleotides(dfprobes['padded_sequence'])
dfcount=pd.DataFrame(index=['A', 'C', 'G', 'T', 'U', '-'])
for column in df_nucleotides:
dfcount[column]=df_nucleotides[column].value_counts()
dfcount=dfcount.fillna(0) #zeros for NaN
dfcount.transpose().plot(figsize=(15,5), kind='bar')
plt.show()
### Preparation for later classification
mean=dfprobes['signal binding'].mean()
std=dfprobes['signal binding'].std()
THRESHOLD=mean+2*std
dfprobes['positive probe']=dfprobes['signal binding'].apply(lambda s: True if s>THRESHOLD else False)
print('I: The whole dataset has been used to set the threshold for a positive probe.')
print('T: The threshold is %f' %THRESHOLD)
print(f"I: {len(dfprobes[dfprobes['positive probe']])} probes of {len(dfprobes)} are above threshold.")
I: Visualisation of the base composition per position after slicing.
I: The whole dataset has been used to set the threshold for a positive probe. T: The threshold is 0.663948 I: 196 probes of 3149 are above threshold.
### Shuffle and prepare dataset for training and testing
# shuffle
dfprobes = shuffle(dfprobes)
# display histogramms of dataset
dfprobes['signal binding'].plot(kind='hist', bins=25).axvline(x=THRESHOLD, color='r', linestyle='-.', lw=0.5, label='threshold classification')
# complete data
X=mf.hotencode_sequence(dfprobes['padded_sequence'], nuc_type=NUC_TYPE)
y=np.array(dfprobes['signal binding'])
#### Perfrom GridCV Search for exploration of the motif length and other model parameters
# primary goal: identify the minimum motif length which gives a good r-value
# optional: allow also for global optimization to verify whether the local optimization is good enough
# optional: vary G0 to find a good regression model
# optional: fitG0=True. This option adjust G0, so that maximal binding is around 1 (prevents saturation of probes by multiple binding events)
model_grid=mf.findmotif(protein_conc=PROT_CONC, both_strands=BOTH_STRANDS, fit_G0=True)
# prepare grid search over parameters
param_grid = {"motif_length": [1,2,3,4,5,6]} # choose sensible range for length of motif and other parameters
# run grid search
grid_search = GridSearchCV(model_grid, param_grid=param_grid, verbose=2,
cv=RepeatedKFold(n_splits=5, n_repeats=10),
n_jobs=-1,
return_train_score=True)
start = time()
grid_search.fit(X, y)
df_grid=pd.DataFrame(grid_search.cv_results_)
print("I: GridSearchCV took %.2f hours for %d candidate parameter settings."
% ((time() - start)/3600, len(grid_search.cv_results_["params"])))
print('I: number of samples: %i' %len(X))
df_grid=pd.DataFrame(grid_search.cv_results_)
print('I: Plot of r vs motif length')
df_grid['r(train)']=df_grid['mean_train_score'].apply(np.sqrt)
df_grid['r(test)']=df_grid['mean_test_score'].apply(np.sqrt)
df_grid['std(train)']=df_grid['std_train_score'].apply(np.sqrt)
df_grid['std(test)']=df_grid['std_test_score'].apply(np.sqrt)
fig, ax=plt.subplots()
df_grid.plot(ax=ax,kind='scatter', x='param_motif_length', y='r(train)', yerr='std(train)', figsize=(5,3), c='blue').set_xticks(param_grid["motif_length"])
df_grid.plot(ax=ax,kind='scatter', x='param_motif_length', y='r(test)', yerr='std(test)', c='red')
ax.set_ylabel('r')
ax.legend(['train', 'test'])
plt.show()
df_grid[['param_motif_length','r(test)', 'r(train)']]
Fitting 50 folds for each of 6 candidates, totalling 300 fits I: GridSearchCV took 12.73 hours for 6 candidate parameter settings. I: number of samples: 3149 I: Plot of r vs motif length
| param_motif_length | r(test) | r(train) | |
|---|---|---|---|
| 0 | 1 | 0.177967 | 0.187764 |
| 1 | 2 | 0.663136 | 0.666127 |
| 2 | 3 | 0.676877 | 0.678723 |
| 3 | 4 | 0.669234 | 0.673278 |
| 4 | 5 | 0.697536 | 0.705088 |
| 5 | 6 | 0.696467 | 0.705967 |
%store df_grid
Stored 'df_grid' (DataFrame)
%store param_grid
Stored 'param_grid' (dict)
### run a number of identical optimizations with motif length found during grid search
### goal: find best motif through repetition, judge stabiltiy of optimization
CORE_MOTIF_LENGTH=4 # adjust core motif length if needed, motif length can be changed later
# prepare for ipyparallel
number_of_optimizations = 200
model_list = [mf.findmotif(motif_length=CORE_MOTIF_LENGTH, protein_conc=PROT_CONC, both_strands=BOTH_STRANDS, fit_G0=True)] * number_of_optimizations
X_list = [X] * number_of_optimizations
y_list = [y] * number_of_optimizations
def single_job(model, X, y):
model.fit(X, y)
return {'model':model}
# run the optimizations on ipp.cluster
start = time()
with ipp.Cluster(log_level=40) as rc:
rc[:].use_pickle()
view = rc.load_balanced_view()
asyncresult = view.map_async(single_job, model_list, X_list, y_list)
asyncresult.wait_interactive()
result = asyncresult.get()
print("I: Optimization took %.2f hours." % ((time() - start) / 3600))
# assemble results and analyze
df_repetitions=pd.DataFrame(result)
df_repetitions['r']=df_repetitions['model'].apply(lambda e: mf.linregress(e.predict(X),y).rvalue)
df_repetitions['G0']=df_repetitions['model'].apply(lambda e: e.finalG0_)
df_repetitions['max binding']=df_repetitions['model'].apply(lambda e: e.max_binding_fit)
df_repetitions['min binding']=df_repetitions['model'].apply(lambda e: e.min_binding_fit)
df_repetitions['ratio'] = df_repetitions['model'].apply(lambda e: e.ratio)
df_repetitions['energies']=df_repetitions['model'].apply(lambda e: e.energies_)
df_repetitions['information']=df_repetitions['model'].apply(lambda e: mf.energies2information(e.energies_))
# display results of the ensemble of optimizations
print('I: Results of the repeated motif finding, sorted according to the regression coefficient')
df_repetitions.sort_values('r', ascending=False, inplace=True)
mf.display_df(df_repetitions, nuc_type=NUC_TYPE)
0%| | 0/16 [00:00<?, ?engine/s]
single_job: 0%| | 0/200 [00:00<?, ?tasks/s]
I: Optimization took 9.36 hours. I: Results of the repeated motif finding, sorted according to the regression coefficient
| model | r | G0 | max binding | min binding | ratio | energies | information | logo | |
|---|---|---|---|---|---|---|---|---|---|
| 56 | suppressed | 0.706996 | -9177.028204 | 0.972033 | 0.133311 | 7.291495 | -7053,.. | 2.362101 | |
| 142 | suppressed | 0.706961 | -10863.349327 | 0.964528 | 0.124707 | 7.734337 | -5334,.. | 2.310951 | |
| 157 | suppressed | 0.706862 | -9727.766114 | 0.966156 | 0.135750 | 7.117146 | -6577,.. | 2.317627 | |
| 148 | suppressed | 0.706826 | -11675.698748 | 0.967140 | 0.134469 | 7.192312 | -4909,.. | 2.269945 | |
| 163 | suppressed | 0.706807 | -11486.910299 | 0.965590 | 0.136809 | 7.057952 | -5785,.. | 2.214978 | |
| 131 | suppressed | 0.706774 | -9728.752465 | 0.979635 | 0.121770 | 8.044950 | -5290,.. | 2.300738 | |
| 106 | suppressed | 0.706773 | -7915.539991 | 0.952564 | 0.124481 | 7.652290 | -8470,.. | 2.364914 | |
| 147 | suppressed | 0.706685 | -10933.515501 | 0.965363 | 0.132006 | 7.313014 | -4346,.. | 2.340699 | |
| 70 | suppressed | 0.706631 | -12214.012249 | 0.963979 | 0.139102 | 6.930042 | -5148,.. | 2.223107 | |
| 89 | suppressed | 0.706580 | -11248.116017 | 0.968648 | 0.136544 | 7.094042 | -6221,.. | 2.123406 | |
| 170 | suppressed | 0.706539 | -12052.588484 | 0.963785 | 0.144152 | 6.685888 | -4561,.. | 2.256433 | |
| 113 | suppressed | 0.706530 | -11913.812017 | 0.969498 | 0.126538 | 7.661721 | -4487,.. | 2.247607 | |
| 52 | suppressed | 0.706515 | -12926.542010 | 0.965676 | 0.140475 | 6.874351 | -4724,.. | 2.109999 | |
| 2 | suppressed | 0.706428 | -12557.874320 | 0.958903 | 0.147678 | 6.493219 | -4832,.. | 2.246795 | |
| 193 | suppressed | 0.706419 | -13273.206683 | 0.974611 | 0.142844 | 6.822906 | -4308,.. | 2.144290 | |
| 67 | suppressed | 0.706398 | -12923.995193 | 0.967148 | 0.137279 | 7.045115 | -4610,.. | 2.111763 | |
| 115 | suppressed | 0.706326 | -12983.544326 | 0.972911 | 0.146713 | 6.631377 | -5042,.. | 2.079642 | |
| 59 | suppressed | 0.706294 | -12176.505872 | 0.965304 | 0.148764 | 6.488806 | -5943,.. | 2.064881 | |
| 0 | suppressed | 0.706252 | -12343.309172 | 0.956334 | 0.143811 | 6.649934 | -5709,.. | 2.072961 | |
| 117 | suppressed | 0.706111 | -12177.537911 | 0.970942 | 0.134118 | 7.239444 | -3974,.. | 2.095936 | |
| 18 | suppressed | 0.706074 | -13749.828576 | 0.975109 | 0.156092 | 6.246997 | -4223,.. | 2.058359 | |
| 127 | suppressed | 0.706036 | -10854.233141 | 0.953125 | 0.141681 | 6.727261 | -6960,.. | 2.116820 | |
| 79 | suppressed | 0.705990 | -12794.544096 | 0.974744 | 0.141340 | 6.896470 | -4728,.. | 2.046540 | |
| 27 | suppressed | 0.705884 | -10317.128912 | 0.967302 | 0.150407 | 6.431250 | -4349,.. | 2.238211 | |
| 77 | suppressed | 0.705877 | -14307.909056 | 0.969338 | 0.152928 | 6.338521 | -4033,.. | 1.948135 | |
| 5 | suppressed | 0.705833 | -13465.399731 | 0.968937 | 0.154969 | 6.252474 | -4878,.. | 1.966427 | |
| 28 | suppressed | 0.705824 | -13257.450651 | 0.970967 | 0.151509 | 6.408643 | -5022,.. | 2.031888 | |
| 12 | suppressed | 0.705811 | -11814.471246 | 0.965943 | 0.154481 | 6.252820 | -6645,.. | 2.010246 | |
| 190 | suppressed | 0.705788 | -14142.449874 | 0.973330 | 0.155344 | 6.265625 | -4195,.. | 2.043055 | |
| 130 | suppressed | 0.705769 | -13728.165226 | 0.967874 | 0.140905 | 6.868995 | -4015,.. | 2.022657 | |
| 105 | suppressed | 0.705719 | -14379.041850 | 0.978379 | 0.154051 | 6.350986 | -4013,.. | 1.993526 | |
| 111 | suppressed | 0.705703 | -12947.902083 | 0.970576 | 0.149884 | 6.475512 | -4823,.. | 1.953821 | |
| 135 | suppressed | 0.705640 | -14039.619117 | 0.973976 | 0.150254 | 6.482181 | -3802,.. | 2.004695 | |
| 43 | suppressed | 0.705622 | -14487.299010 | 0.967699 | 0.160324 | 6.035891 | -3927,.. | 1.978947 | |
| 83 | suppressed | 0.705541 | -14577.425748 | 0.981237 | 0.163550 | 5.999600 | -4077,.. | 1.946875 | |
| 178 | suppressed | 0.705485 | -14061.491830 | 0.968489 | 0.159125 | 6.086355 | -3858,.. | 1.959808 | |
| 32 | suppressed | 0.705459 | -14266.668919 | 0.967770 | 0.170622 | 5.672020 | -4150,.. | 2.023246 | |
| 26 | suppressed | 0.705413 | -14701.042064 | 0.978571 | 0.169238 | 5.782209 | -3986,.. | 1.912349 | |
| 54 | suppressed | 0.705339 | -14959.846587 | 0.975378 | 0.162206 | 6.013202 | -3678,.. | 1.902153 | |
| 45 | suppressed | 0.705324 | -13361.090070 | 0.968532 | 0.159637 | 6.067098 | -4836,.. | 2.026302 | |
| 114 | suppressed | 0.705315 | -12670.666987 | 0.976832 | 0.158935 | 6.146115 | -6041,.. | 1.986389 | |
| 24 | suppressed | 0.705314 | -14054.154743 | 0.980018 | 0.155556 | 6.300092 | -4528,.. | 1.938419 | |
| 14 | suppressed | 0.705282 | -14088.876587 | 0.974698 | 0.152134 | 6.406837 | -3837,.. | 2.043591 | |
| 21 | suppressed | 0.705188 | -8305.931026 | 0.962993 | 0.144902 | 6.645806 | -8188,.. | 2.340227 | |
| 48 | suppressed | 0.705135 | -14723.661981 | 0.972560 | 0.172590 | 5.635099 | -3644,.. | 1.984678 | |
| 36 | suppressed | 0.705020 | -14820.934033 | 0.977018 | 0.168904 | 5.784454 | -3864,.. | 1.823897 | |
| 120 | suppressed | 0.704992 | -14451.163255 | 0.978946 | 0.175247 | 5.586075 | -4413,.. | 1.908539 | |
| 140 | suppressed | 0.704955 | -12370.971274 | 0.974654 | 0.156738 | 6.218381 | -6621,.. | 1.886788 | |
| 124 | suppressed | 0.704954 | -11465.242231 | 0.967891 | 0.169187 | 5.720832 | -7332,.. | 1.946722 | |
| 139 | suppressed | 0.704909 | -13699.534998 | 0.972513 | 0.166307 | 5.847711 | -4736,.. | 2.000360 | |
| 100 | suppressed | 0.704803 | -15229.762285 | 0.991523 | 0.166159 | 5.967320 | -3816,.. | 1.799674 | |
| 187 | suppressed | 0.704578 | -14778.630303 | 0.961413 | 0.175678 | 5.472591 | -4233,.. | 1.857307 | |
| 181 | suppressed | 0.704422 | -15758.529236 | 0.994799 | 0.192793 | 5.159941 | -3386,.. | 1.755134 | |
| 3 | suppressed | 0.704033 | -14230.229409 | 0.971167 | 0.173523 | 5.596750 | -3634,.. | 1.964398 | |
| 7 | suppressed | 0.704010 | -13187.195178 | 0.972048 | 0.172629 | 5.630850 | -5467,.. | 2.011791 | |
| 25 | suppressed | 0.703962 | -15155.136559 | 0.974104 | 0.186596 | 5.220385 | -3974,.. | 1.817961 | |
| 60 | suppressed | 0.703665 | -15048.014390 | 1.002399 | 0.193495 | 5.180495 | -4302,.. | 1.708500 | |
| 46 | suppressed | 0.703537 | -14556.597842 | 0.979293 | 0.175544 | 5.578615 | -4895,.. | 1.762471 | |
| 16 | suppressed | 0.703486 | -15225.570281 | 0.971553 | 0.173898 | 5.586916 | -3398,.. | 1.814056 | |
| 123 | suppressed | 0.703194 | -16545.540324 | 0.981143 | 0.209328 | 4.687109 | -3038,.. | 1.554067 | |
| 72 | suppressed | 0.703165 | -15850.112081 | 0.973095 | 0.203243 | 4.787836 | -3353,.. | 1.773192 | |
| 75 | suppressed | 0.698884 | -18233.733094 | 0.996786 | 0.311778 | 3.197099 | -2342,.. | 1.082989 | |
| 38 | suppressed | 0.695414 | -13623.049624 | 0.994722 | 0.202557 | 4.910824 | -3831,.. | 2.396506 | |
| 138 | suppressed | 0.694687 | -15089.902655 | 1.014728 | 0.220736 | 4.597027 | -3338,.. | 2.119809 | |
| 23 | suppressed | 0.694529 | -16078.835412 | 1.013021 | 0.181807 | 5.571966 | 3802,.. | 1.433959 | |
| 152 | suppressed | 0.694514 | -15316.049402 | 0.999945 | 0.148745 | 6.722527 | 4993,.. | 1.550912 | |
| 11 | suppressed | 0.694501 | -16062.485560 | 0.998670 | 0.169212 | 5.901884 | 3614,.. | 1.456914 | |
| 112 | suppressed | 0.694494 | -13742.600462 | 0.997189 | 0.111924 | 8.909516 | 6957,.. | 1.689719 | |
| 175 | suppressed | 0.694472 | -14164.875513 | 0.996417 | 0.138673 | 7.185389 | 5902,.. | 1.575634 | |
| 66 | suppressed | 0.694472 | -15002.774532 | 1.008638 | 0.126429 | 7.977893 | 5820,.. | 1.604546 | |
| 168 | suppressed | 0.694467 | -13724.735721 | 1.004490 | 0.133346 | 7.532940 | 6287,.. | 1.566653 | |
| 184 | suppressed | 0.694458 | -15387.275370 | 0.994654 | 0.188431 | 5.278623 | 2995,.. | 1.401132 | |
| 133 | suppressed | 0.694448 | -12414.691250 | 0.994884 | 0.109906 | 9.052176 | 9312,.. | 1.711392 | |
| 93 | suppressed | 0.694435 | -14169.410972 | 1.002387 | 0.124961 | 8.021589 | 9148,.. | 1.639763 | |
| 62 | suppressed | 0.694433 | -12264.565203 | 1.005852 | 0.122552 | 8.207558 | 9222,.. | 1.686218 | |
| 183 | suppressed | 0.694432 | -14371.123342 | 0.994981 | 0.193133 | 5.151792 | 3347,.. | 1.392146 | |
| 53 | suppressed | 0.694412 | -15834.883136 | 1.005492 | 0.156837 | 6.411059 | 4776,.. | 1.484806 | |
| 96 | suppressed | 0.694405 | -15150.371077 | 1.000244 | 0.148052 | 6.756014 | 3849,.. | 1.513579 | |
| 68 | suppressed | 0.694395 | -13458.785667 | 0.997705 | 0.120960 | 8.248227 | 8730,.. | 1.687686 | |
| 176 | suppressed | 0.694343 | -14975.228364 | 1.005385 | 0.155619 | 6.460571 | 3759,.. | 1.504388 | |
| 74 | suppressed | 0.694342 | -15150.985358 | 1.003605 | 0.131100 | 7.655257 | 7033,.. | 1.593152 | |
| 128 | suppressed | 0.694338 | -16536.467718 | 1.000250 | 0.227072 | 4.404989 | 2682,.. | 1.298022 | |
| 185 | suppressed | 0.694315 | -13738.270855 | 1.009902 | 0.164943 | 6.122722 | 4432,.. | 1.465954 | |
| 173 | suppressed | 0.694313 | -15883.089098 | 1.010970 | 0.174569 | 5.791239 | 4027,.. | 1.474913 | |
| 49 | suppressed | 0.694281 | -17244.301344 | 0.992480 | 0.222056 | 4.469505 | 2967,.. | 1.288843 | |
| 197 | suppressed | 0.694280 | -13643.979833 | 1.000934 | 0.194265 | 5.152403 | 2254,.. | 1.498778 | |
| 87 | suppressed | 0.694267 | -16629.203447 | 0.995603 | 0.191165 | 5.208070 | 2656,.. | 1.417589 | |
| 171 | suppressed | 0.694237 | -14655.811006 | 1.003676 | 0.138184 | 7.263338 | 7563,.. | 1.574202 | |
| 134 | suppressed | 0.694172 | -8769.493202 | 1.000198 | 0.090513 | 11.050361 | 14864,.. | 1.768039 | |
| 6 | suppressed | 0.694171 | -15555.987352 | 0.994548 | 0.158038 | 6.293098 | 5609,.. | 1.512444 | |
| 107 | suppressed | 0.693943 | -14159.521215 | 1.008739 | 0.152237 | 6.626090 | 3627,.. | 1.475668 | |
| 98 | suppressed | 0.693925 | -16305.551122 | 1.003250 | 0.175467 | 5.717609 | 5049,.. | 1.439092 | |
| 19 | suppressed | 0.693917 | -13874.488687 | 1.017081 | 0.181240 | 5.611794 | 4695,.. | 1.447960 | |
| 71 | suppressed | 0.693914 | -16364.352404 | 0.996756 | 0.171728 | 5.804261 | 5454,.. | 1.415694 | |
| 137 | suppressed | 0.693901 | -15117.836331 | 0.989319 | 0.156571 | 6.318654 | 2208,.. | 1.636275 | |
| 92 | suppressed | 0.693895 | -16730.019051 | 1.005339 | 0.200997 | 5.001760 | 3714,.. | 1.329585 | |
| 151 | suppressed | 0.693877 | -16347.669237 | 1.001342 | 0.167465 | 5.979419 | 5545,.. | 1.433163 | |
| 177 | suppressed | 0.693819 | -16232.167039 | 1.000425 | 0.232371 | 4.305291 | 1694,.. | 1.396094 | |
| 76 | suppressed | 0.693783 | -14796.647482 | 0.990767 | 0.143743 | 6.892647 | 1966,.. | 1.729334 | |
| 169 | suppressed | 0.693747 | -14755.260130 | 0.996178 | 0.135208 | 7.367745 | 3340,.. | 1.640316 | |
| 41 | suppressed | 0.693740 | -17832.648444 | 1.000571 | 0.273132 | 3.663323 | 2284,.. | 1.174987 | |
| 29 | suppressed | 0.693730 | -16794.469280 | 1.006752 | 0.190572 | 5.282797 | 4328,.. | 1.349449 | |
| 174 | suppressed | 0.693650 | -16425.707625 | 1.000743 | 0.190659 | 5.248862 | 2101,.. | 1.450030 | |
| 73 | suppressed | 0.693591 | -15905.953102 | 0.997153 | 0.195337 | 5.104782 | 1864,.. | 1.526860 | |
| 22 | suppressed | 0.693492 | -16180.359940 | 0.999066 | 0.145244 | 6.878535 | 4600,.. | 1.491685 | |
| 37 | suppressed | 0.693463 | -12675.489582 | 1.004201 | 0.131172 | 7.655599 | 1798,.. | 1.885736 | |
| 51 | suppressed | 0.693445 | -15151.229348 | 0.994113 | 0.132859 | 7.482468 | 6056,.. | 1.614267 | |
| 63 | suppressed | 0.693369 | -15653.765872 | 0.999983 | 0.153297 | 6.523169 | 2486,.. | 1.631502 | |
| 146 | suppressed | 0.693074 | -16487.993317 | 1.017384 | 0.179676 | 5.662324 | 6159,.. | 1.396452 | |
| 50 | suppressed | 0.693036 | -14052.624639 | 0.970005 | 0.117852 | 8.230672 | 6736,.. | 1.675559 | |
| 20 | suppressed | 0.693028 | -12364.355925 | 1.003365 | 0.115630 | 8.677385 | 3367,.. | 1.765088 | |
| 194 | suppressed | 0.692945 | -15805.385445 | 0.989678 | 0.149223 | 6.632210 | 6822,.. | 1.489779 | |
| 80 | suppressed | 0.692882 | -14273.457012 | 1.004135 | 0.139847 | 7.180234 | 10160,.. | 1.578089 | |
| 116 | suppressed | 0.692686 | -18097.210380 | 1.016817 | 0.285228 | 3.564926 | 2972,.. | 1.087331 | |
| 132 | suppressed | 0.692673 | -18432.020836 | 0.989300 | 0.318265 | 3.108419 | 1567,.. | 1.016520 | |
| 180 | suppressed | 0.692585 | -16033.236868 | 1.001339 | 0.156398 | 6.402497 | 1619,.. | 1.558806 | |
| 15 | suppressed | 0.692562 | -18077.831238 | 0.999079 | 0.290434 | 3.439956 | 1318,.. | 1.106852 | |
| 9 | suppressed | 0.692526 | -14921.955589 | 1.000281 | 0.090554 | 11.046244 | 4794,.. | 1.701320 | |
| 167 | suppressed | 0.692142 | -13294.262711 | 1.001934 | 0.249682 | 4.012837 | 927,.. | 1.347249 | |
| 65 | suppressed | 0.692015 | -15158.321299 | 0.999915 | 0.268486 | 3.724273 | 2478,.. | 1.166006 | |
| 40 | suppressed | 0.691969 | -14626.548881 | 0.998482 | 0.177429 | 5.627515 | 999,.. | 1.559129 | |
| 85 | suppressed | 0.691758 | -12029.802566 | 0.987214 | 0.056552 | 17.456793 | 5942,.. | 1.867132 | |
| 145 | suppressed | 0.691695 | -14977.745283 | 0.998701 | 0.092298 | 10.820350 | 4117,.. | 1.700267 | |
| 121 | suppressed | 0.691649 | -16852.495659 | 1.002502 | 0.198143 | 5.059479 | 1279,.. | 1.448922 | |
| 109 | suppressed | 0.691624 | 11.105179 | 0.990409 | 0.092495 | 10.707753 | -13237,.. | 2.294190 | |
| 42 | suppressed | 0.691413 | -14119.200441 | 0.999907 | 0.162818 | 6.141246 | 1923,.. | 1.683997 | |
| 102 | suppressed | 0.691146 | -15496.540162 | 1.004959 | 0.155285 | 6.471688 | 6389,.. | 1.556367 | |
| 95 | suppressed | 0.690974 | -13939.120994 | 0.999838 | 0.264063 | 3.786364 | -6590,.. | 1.228277 | |
| 88 | suppressed | 0.690820 | -14844.309111 | 1.014893 | 0.167929 | 6.043600 | 2945,.. | 1.416806 | |
| 188 | suppressed | 0.690502 | -12473.763646 | 0.994681 | 0.161716 | 6.150772 | 480,.. | 1.511505 | |
| 4 | suppressed | 0.690244 | -15635.011527 | 1.001321 | 0.339025 | 2.953533 | 808,.. | 1.074127 | |
| 182 | suppressed | 0.689944 | -14860.967741 | 1.017382 | 0.367334 | 2.769637 | 979,.. | 1.026860 | |
| 136 | suppressed | 0.689725 | -15719.484869 | 1.009689 | 0.213316 | 4.733294 | 1237,.. | 1.362489 | |
| 108 | suppressed | 0.689267 | -16868.286217 | 1.010455 | 0.398269 | 2.537115 | 1003,.. | 0.951770 | |
| 179 | suppressed | 0.688211 | -14695.841635 | 1.002931 | 0.144639 | 6.934023 | 3931,.. | 1.532106 | |
| 78 | suppressed | 0.688161 | -14766.394484 | 0.995935 | 0.285833 | 3.484327 | 269,.. | 1.178970 | |
| 58 | suppressed | 0.688110 | -14520.763303 | 0.993134 | 0.221677 | 4.480086 | -57,.. | 1.317097 | |
| 158 | suppressed | 0.687926 | -12973.184141 | 1.006087 | 0.053497 | 18.806378 | 6828,.. | 2.049839 | |
| 69 | suppressed | 0.687829 | -15418.965515 | 0.993655 | 0.194727 | 5.102813 | 67,.. | 1.446613 | |
| 97 | suppressed | 0.687648 | -14316.538926 | 0.998669 | 0.093484 | 10.682791 | 334,.. | 1.750742 | |
| 101 | suppressed | 0.687579 | -15265.916851 | 1.000543 | 0.210953 | 4.742970 | 54,.. | 1.394806 | |
| 144 | suppressed | 0.687376 | -17053.219866 | 0.996680 | 0.255810 | 3.896177 | 267,.. | 1.238990 | |
| 199 | suppressed | 0.687220 | -11473.991534 | 0.986581 | 0.147360 | 6.695019 | -67,.. | 1.615409 | |
| 150 | suppressed | 0.687185 | -18000.814671 | 0.999958 | 0.310344 | 3.222093 | 99,.. | 1.064935 | |
| 119 | suppressed | 0.687071 | -15847.464550 | 1.018856 | 0.171281 | 5.948439 | 4099,.. | 1.444477 | |
| 104 | suppressed | 0.686985 | -17326.854985 | 1.002006 | 0.357138 | 2.805658 | 456,.. | 1.042707 | |
| 159 | suppressed | 0.686388 | -12400.171387 | 0.968001 | 0.177878 | 5.441929 | -209,.. | 1.478802 | |
| 61 | suppressed | 0.686180 | -18478.487653 | 1.000314 | 0.323603 | 3.091179 | 6,.. | 0.965752 | |
| 91 | suppressed | 0.685937 | -16890.233842 | 0.990974 | 0.403501 | 2.455938 | 945,.. | 0.877447 | |
| 39 | suppressed | 0.685844 | -12491.236604 | 0.992076 | 0.189296 | 5.240883 | 14,.. | 1.431698 | |
| 84 | suppressed | 0.685798 | -16772.993369 | 1.001093 | 0.164108 | 6.100214 | 533,.. | 1.397848 | |
| 164 | suppressed | 0.685721 | -15322.547333 | 1.000197 | 0.268926 | 3.719232 | -64,.. | 1.206657 | |
| 191 | suppressed | 0.685667 | -13083.237456 | 0.997780 | 0.051077 | 19.534712 | 9402,.. | 1.822835 | |
| 34 | suppressed | 0.684785 | -19254.480430 | 1.010454 | 0.423364 | 2.386724 | 927,.. | 0.801820 | |
| 186 | suppressed | 0.684724 | -14336.818556 | 0.995270 | 0.065934 | 15.094839 | 4920,.. | 1.799114 | |
| 125 | suppressed | 0.684648 | -7343.474624 | 1.003233 | 0.003889 | 257.959484 | 13378,.. | 2.425804 | |
| 195 | suppressed | 0.684487 | -15088.262603 | 1.010624 | 0.081418 | 12.412841 | 4535,.. | 1.707507 | |
| 44 | suppressed | 0.684426 | -12602.203000 | 0.992964 | 0.118584 | 8.373496 | 2830,.. | 1.587170 | |
| 57 | suppressed | 0.684315 | -15432.307927 | 0.987260 | 0.103845 | 9.507039 | 3405,.. | 1.599209 | |
| 35 | suppressed | 0.684293 | -15977.929676 | 1.005304 | 0.146402 | 6.866719 | 3560,.. | 1.468133 | |
| 90 | suppressed | 0.683921 | -10368.230592 | 1.006699 | 0.020568 | 48.945316 | 9070,.. | 2.053029 | |
| 8 | suppressed | 0.683656 | -14324.525128 | 1.003344 | 0.067062 | 14.961539 | 3683,.. | 1.755301 | |
| 143 | suppressed | 0.683650 | -13322.100404 | 1.004908 | 0.044540 | 22.561888 | 4260,.. | 1.889706 | |
| 149 | suppressed | 0.683370 | -9704.420410 | 0.989848 | 0.040784 | 24.270205 | 3023,.. | 1.967927 | |
| 13 | suppressed | 0.683208 | -12418.264857 | 1.002041 | 0.032487 | 30.844432 | 3250,.. | 1.941987 | |
| 81 | suppressed | 0.683157 | -8065.420824 | 0.997320 | 0.026629 | 37.452357 | 5560,.. | 2.111768 | |
| 155 | suppressed | 0.683082 | -5851.033242 | 1.003461 | 0.015278 | 65.681385 | 4198,.. | 2.211873 | |
| 162 | suppressed | 0.683080 | -13541.997285 | 1.003286 | 0.067781 | 14.801812 | 2761,.. | 1.737107 | |
| 118 | suppressed | 0.682673 | -12636.113605 | 0.996464 | 0.097281 | 10.243117 | 784,.. | 1.652231 | |
| 129 | suppressed | 0.682144 | -15346.049849 | 1.001480 | 0.170969 | 5.857670 | 1160,.. | 1.378440 | |
| 10 | suppressed | 0.682071 | -15331.747861 | 0.999931 | 0.149585 | 6.684724 | 823,.. | 1.418027 | |
| 94 | suppressed | 0.681737 | -9516.486317 | 0.999176 | 0.029912 | 33.403877 | 2670,.. | 2.135083 | |
| 126 | suppressed | 0.681299 | -16683.012731 | 1.014760 | 0.192281 | 5.277496 | 1351,.. | 1.308661 | |
| 47 | suppressed | 0.681072 | -11282.463532 | 0.985347 | 0.102623 | 9.601607 | 1039,.. | 1.626720 | |
| 1 | suppressed | 0.680545 | -14839.037213 | 1.003692 | 0.083693 | 11.992469 | 829,.. | 1.717836 | |
| 31 | suppressed | 0.680447 | -14652.873022 | 1.015388 | 0.171427 | 5.923161 | 1006,.. | 1.402128 | |
| 110 | suppressed | 0.680445 | -10737.317516 | 0.996909 | 0.038054 | 26.197516 | 1706,.. | 1.984588 | |
| 103 | suppressed | 0.680326 | -9534.988898 | 1.027888 | 0.061813 | 16.629039 | 2996,.. | 1.967403 | |
| 198 | suppressed | 0.680190 | -14300.365282 | 1.009282 | 0.350376 | 2.880571 | -7534,.. | 1.057658 | |
| 64 | suppressed | 0.680186 | -13690.852850 | 0.974986 | 0.254555 | 3.830155 | 1094,.. | 1.124125 | |
| 172 | suppressed | 0.679898 | -12051.276254 | 0.978729 | 0.162753 | 6.013589 | 456,.. | 1.591660 | |
| 156 | suppressed | 0.679765 | -9414.953722 | 1.018092 | 0.040578 | 25.089760 | 809,.. | 2.035834 | |
| 86 | suppressed | 0.678314 | -18485.870253 | 1.008287 | 0.315098 | 3.199922 | 425,.. | 0.955099 | |
| 166 | suppressed | 0.674830 | -10213.109846 | 1.006027 | 0.009063 | 111.007384 | 589,.. | 2.314159 | |
| 17 | suppressed | 0.674305 | -7399.358588 | 0.994818 | 0.006210 | 160.208548 | -26,.. | 2.443177 | |
| 33 | suppressed | 0.674213 | -11999.325020 | 0.997773 | 0.026210 | 38.067764 | 947,.. | 2.166395 | |
| 153 | suppressed | 0.667498 | -10779.112703 | 0.995397 | 0.125103 | 7.956619 | 3265,.. | 1.909201 | |
| 160 | suppressed | 0.666145 | -12184.310797 | 0.953825 | 0.092485 | 10.313298 | 1992,.. | 1.802685 | |
| 154 | suppressed | 0.660428 | -11935.061554 | 1.001120 | 0.144910 | 6.908575 | 4217,.. | 1.810810 | |
| 161 | suppressed | 0.658164 | -9106.352917 | 1.010160 | 0.048330 | 20.901503 | -8894,.. | 2.054313 | |
| 122 | suppressed | 0.658141 | -12382.678106 | 1.008789 | 0.070623 | 14.284063 | -6513,.. | 1.926606 | |
| 165 | suppressed | 0.657027 | -10913.849168 | 1.014197 | 0.064141 | 15.811935 | -7706,.. | 1.997895 | |
| 192 | suppressed | 0.641364 | -3971.288308 | 0.993242 | 0.033069 | 30.035167 | 4877,.. | 2.237789 | |
| 189 | suppressed | 0.637593 | -17525.524191 | 1.015197 | 0.222545 | 4.561765 | 1681,.. | 1.244728 | |
| 30 | suppressed | 0.614605 | -1530.266321 | 0.993504 | 0.000391 | 2542.547937 | 1741,.. | 2.684948 | |
| 82 | suppressed | 0.541329 | 870.648161 | 1.005770 | 0.000019 | 53113.958037 | 5372,.. | 3.934987 | |
| 99 | suppressed | 0.507435 | -6071.440214 | 1.026697 | 0.027348 | 37.541558 | 4211,.. | 3.066611 | |
| 141 | suppressed | 0.488867 | -1285.003906 | 0.931659 | 0.009375 | 99.375559 | 845,.. | 2.713847 | |
| 55 | suppressed | 0.474265 | -12607.693930 | 1.177187 | 0.005371 | 219.186429 | 8453,.. | 1.726284 | |
| 196 | suppressed | 0.299159 | -8098.683107 | 0.946400 | 0.012435 | 76.108961 | 4247,.. | 2.373405 |
df_repetitions[:-int(len(df_repetitions)/20)] #delete 5% worst
| model | r | G0 | max binding | min binding | ratio | energies | information | logo | PCA1 | PCA2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 56 | findmotif(both_strands=False, fit_G0=True, mot... | 0.706996 | -9177.028204 | 0.972033 | 0.133311 | 7.291495 | [-7053.185480104829, 14371.734708221606, -8323... | 2.362101 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 19825.322932 | -1553.438968 |
| 142 | findmotif(both_strands=False, fit_G0=True, mot... | 0.706961 | -10863.349327 | 0.964528 | 0.124707 | 7.734337 | [-5334.0554370904765, 9281.749185392397, -6488... | 2.310951 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 16781.517546 | -769.408837 |
| 157 | findmotif(both_strands=False, fit_G0=True, mot... | 0.706862 | -9727.766114 | 0.966156 | 0.135750 | 7.117146 | [-6577.773398411619, 13912.727582882128, -7976... | 2.317627 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 19146.576793 | -1523.318826 |
| 148 | findmotif(both_strands=False, fit_G0=True, mot... | 0.706826 | -11675.698748 | 0.967140 | 0.134469 | 7.192312 | [-4909.541315277625, 8409.74004047994, -6169.3... | 2.269945 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 15561.083980 | -897.311099 |
| 163 | findmotif(both_strands=False, fit_G0=True, mot... | 0.706807 | -11486.910299 | 0.965590 | 0.136809 | 7.057952 | [-5785.075296653869, 11148.827507828752, -6951... | 2.214978 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 16311.527973 | -1288.988453 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 110 | findmotif(both_strands=False, fit_G0=True, mot... | 0.680445 | -10737.317516 | 0.996909 | 0.038054 | 26.197516 | [1706.4767801559324, -4603.711766799798, 7958.... | 1.984588 | <PIL.Image.Image image mode=RGB size=1200x900 ... | -17787.806490 | -1832.951702 |
| 103 | findmotif(both_strands=False, fit_G0=True, mot... | 0.680326 | -9534.988898 | 1.027888 | 0.061813 | 16.629039 | [2996.130586825946, -2864.2646122122032, 2348.... | 1.967403 | <PIL.Image.Image image mode=RGB size=1200x900 ... | -18716.013085 | -6072.519659 |
| 198 | findmotif(both_strands=False, fit_G0=True, mot... | 0.680190 | -14300.365282 | 1.009282 | 0.350376 | 2.880571 | [-7534.602343903572, 17518.12083155398, -6352.... | 1.057658 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 14420.431194 | -580.803961 |
| 64 | findmotif(both_strands=False, fit_G0=True, mot... | 0.680186 | -13690.852850 | 0.974986 | 0.254555 | 3.830155 | [1094.2766885463207, -722.0191909075406, 474.3... | 1.124125 | <PIL.Image.Image image mode=RGB size=1200x900 ... | -2341.691988 | 19657.579257 |
| 172 | findmotif(both_strands=False, fit_G0=True, mot... | 0.679898 | -12051.276254 | 0.978729 | 0.162753 | 6.013589 | [456.14039991784085, 20.44544959578485, 0.7665... | 1.591660 | <PIL.Image.Image image mode=RGB size=1200x900 ... | 3391.185915 | -12443.880252 |
181 rows × 11 columns
df_repetitions=df_repetitions[:-int(len(df_repetitions)/20)] #delete 5% worst
### compare energy matrices of ensemble using PCA
print('I: Histogram of the regression coefficients r obtained by repeated optimizaion with the subset.')
df_repetitions['r'].plot(kind='hist')
plt.show()
pca = PCA(n_components=2)
pca_2c=pca.fit_transform(df_repetitions['energies'].tolist())
df_repetitions[['PCA1', 'PCA2']]=pca_2c
print('I: 2-dimensional PCA explained %i %% of variance.' %(sum(pca.explained_variance_ratio_)*100))
if sum(pca.explained_variance_ratio_)<0.5:
print('W: 2-dimensional PCA explained only %i %% of variance' %(sum(pca.explained_variance_ratio_)*100))
print('I: Visualization of the PCA with the regression quality by color.')
df_repetitions.plot(x='PCA1', y='PCA2', kind='scatter', c='r',cmap=cm.coolwarm, edgecolors='black', linewidths=0.3)
I: Histogram of the regression coefficients r obtained by repeated optimizaion with the subset.
I: 2-dimensional PCA explained 79 % of variance. I: Visualization of the PCA with the regression quality by color.
/home/GLipps/.local/lib/python3.8/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute `n_features_` was deprecated in version 1.2 and will be removed in 1.4. Use `n_features_in_` instead. warnings.warn(msg, category=FutureWarning)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe661562a30>
pca.explained_variance_ratio_
array([0.62422327, 0.17305216])
fig=df_repetitions.plot(x='PCA1', y='PCA2', kind='scatter', c='r',cmap=cm.coolwarm, edgecolors='black',
linewidths=0.3).get_figure()
fig.savefig('PCA_no_padding.svg')
mf.display_df(df_repetitions, nuc_type=NUC_TYPE)
| model | r | G0 | max binding | min binding | ratio | energies | information | logo | PCA1 | PCA2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 56 | suppressed | 0.706996 | -9177.028204 | 0.972033 | 0.133311 | 7.291495 | -7053,.. | 2.362101 | 19825.322932 | -1553.438968 | |
| 142 | suppressed | 0.706961 | -10863.349327 | 0.964528 | 0.124707 | 7.734337 | -5334,.. | 2.310951 | 16781.517546 | -769.408837 | |
| 157 | suppressed | 0.706862 | -9727.766114 | 0.966156 | 0.135750 | 7.117146 | -6577,.. | 2.317627 | 19146.576793 | -1523.318826 | |
| 148 | suppressed | 0.706826 | -11675.698748 | 0.967140 | 0.134469 | 7.192312 | -4909,.. | 2.269945 | 15561.083980 | -897.311099 | |
| 163 | suppressed | 0.706807 | -11486.910299 | 0.965590 | 0.136809 | 7.057952 | -5785,.. | 2.214978 | 16311.527973 | -1288.988453 | |
| 131 | suppressed | 0.706774 | -9728.752465 | 0.979635 | 0.121770 | 8.044950 | -5290,.. | 2.300738 | 18368.688916 | -648.153351 | |
| 106 | suppressed | 0.706773 | -7915.539991 | 0.952564 | 0.124481 | 7.652290 | -8470,.. | 2.364914 | 21767.273901 | -1973.738421 | |
| 147 | suppressed | 0.706685 | -10933.515501 | 0.965363 | 0.132006 | 7.313014 | -4346,.. | 2.340699 | 16241.576656 | -51.849408 | |
| 70 | suppressed | 0.706631 | -12214.012249 | 0.963979 | 0.139102 | 6.930042 | -5148,.. | 2.223107 | 14771.727280 | -1215.450068 | |
| 89 | suppressed | 0.706580 | -11248.116017 | 0.968648 | 0.136544 | 7.094042 | -6221,.. | 2.123406 | 17131.145911 | -1241.442253 | |
| 170 | suppressed | 0.706539 | -12052.588484 | 0.963785 | 0.144152 | 6.685888 | -4561,.. | 2.256433 | 14838.411635 | -994.248033 | |
| 113 | suppressed | 0.706530 | -11913.812017 | 0.969498 | 0.126538 | 7.661721 | -4487,.. | 2.247607 | 15168.678487 | -310.031692 | |
| 52 | suppressed | 0.706515 | -12926.542010 | 0.965676 | 0.140475 | 6.874351 | -4724,.. | 2.109999 | 13999.504943 | -816.220411 | |
| 2 | suppressed | 0.706428 | -12557.874320 | 0.958903 | 0.147678 | 6.493219 | -4832,.. | 2.246795 | 14139.117443 | -1242.466310 | |
| 193 | suppressed | 0.706419 | -13273.206683 | 0.974611 | 0.142844 | 6.822906 | -4308,.. | 2.144290 | 13243.097834 | -701.941397 | |
| 67 | suppressed | 0.706398 | -12923.995193 | 0.967148 | 0.137279 | 7.045115 | -4610,.. | 2.111763 | 14050.827382 | -776.543545 | |
| 115 | suppressed | 0.706326 | -12983.544326 | 0.972911 | 0.146713 | 6.631377 | -5042,.. | 2.079642 | 14094.253400 | -1114.332846 | |
| 59 | suppressed | 0.706294 | -12176.505872 | 0.965304 | 0.148764 | 6.488806 | -5943,.. | 2.064881 | 15677.738878 | -1563.947147 | |
| 0 | suppressed | 0.706252 | -12343.309172 | 0.956334 | 0.143811 | 6.649934 | -5709,.. | 2.072961 | 15282.208651 | -1500.888004 | |
| 117 | suppressed | 0.706111 | -12177.537911 | 0.970942 | 0.134118 | 7.239444 | -3974,.. | 2.095936 | 14931.934693 | 429.135675 | |
| 18 | suppressed | 0.706074 | -13749.828576 | 0.975109 | 0.156092 | 6.246997 | -4223,.. | 2.058359 | 12819.950581 | -817.226814 | |
| 127 | suppressed | 0.706036 | -10854.233141 | 0.953125 | 0.141681 | 6.727261 | -6960,.. | 2.116820 | 17936.973959 | -2214.055464 | |
| 79 | suppressed | 0.705990 | -12794.544096 | 0.974744 | 0.141340 | 6.896470 | -4728,.. | 2.046540 | 14346.213502 | -439.569626 | |
| 27 | suppressed | 0.705884 | -10317.128912 | 0.967302 | 0.150407 | 6.431250 | -4349,.. | 2.238211 | 16939.020766 | -681.852972 | |
| 77 | suppressed | 0.705877 | -14307.909056 | 0.969338 | 0.152928 | 6.338521 | -4033,.. | 1.948135 | 11968.350919 | -626.428754 | |
| 5 | suppressed | 0.705833 | -13465.399731 | 0.968937 | 0.154969 | 6.252474 | -4878,.. | 1.966427 | 13548.939020 | -933.322459 | |
| 28 | suppressed | 0.705824 | -13257.450651 | 0.970967 | 0.151509 | 6.408643 | -5022,.. | 2.031888 | 13742.292223 | -1352.316119 | |
| 12 | suppressed | 0.705811 | -11814.471246 | 0.965943 | 0.154481 | 6.252820 | -6645,.. | 2.010246 | 16481.500242 | -2044.963493 | |
| 190 | suppressed | 0.705788 | -14142.449874 | 0.973330 | 0.155344 | 6.265625 | -4195,.. | 2.043055 | 11921.744929 | -959.975450 | |
| 130 | suppressed | 0.705769 | -13728.165226 | 0.967874 | 0.140905 | 6.868995 | -4015,.. | 2.022657 | 12664.142830 | -352.722105 | |
| 105 | suppressed | 0.705719 | -14379.041850 | 0.978379 | 0.154051 | 6.350986 | -4013,.. | 1.993526 | 11651.704367 | -926.449523 | |
| 111 | suppressed | 0.705703 | -12947.902083 | 0.970576 | 0.149884 | 6.475512 | -4823,.. | 1.953821 | 14488.182566 | -550.496706 | |
| 135 | suppressed | 0.705640 | -14039.619117 | 0.973976 | 0.150254 | 6.482181 | -3802,.. | 2.004695 | 12390.326511 | -304.126450 | |
| 43 | suppressed | 0.705622 | -14487.299010 | 0.967699 | 0.160324 | 6.035891 | -3927,.. | 1.978947 | 11565.797799 | -943.929132 | |
| 83 | suppressed | 0.705541 | -14577.425748 | 0.981237 | 0.163550 | 5.999600 | -4077,.. | 1.946875 | 11633.274617 | -989.040420 | |
| 178 | suppressed | 0.705485 | -14061.491830 | 0.968489 | 0.159125 | 6.086355 | -3858,.. | 1.959808 | 12302.571146 | -253.736307 | |
| 32 | suppressed | 0.705459 | -14266.668919 | 0.967770 | 0.170622 | 5.672020 | -4150,.. | 2.023246 | 11857.043778 | -1355.668409 | |
| 26 | suppressed | 0.705413 | -14701.042064 | 0.978571 | 0.169238 | 5.782209 | -3986,.. | 1.912349 | 11466.745134 | -733.490110 | |
| 54 | suppressed | 0.705339 | -14959.846587 | 0.975378 | 0.162206 | 6.013202 | -3678,.. | 1.902153 | 10845.393256 | -666.955906 | |
| 45 | suppressed | 0.705324 | -13361.090070 | 0.968532 | 0.159637 | 6.067098 | -4836,.. | 2.026302 | 13407.079649 | -1470.679646 | |
| 114 | suppressed | 0.705315 | -12670.666987 | 0.976832 | 0.158935 | 6.146115 | -6041,.. | 1.986389 | 15070.371758 | -2014.172460 | |
| 24 | suppressed | 0.705314 | -14054.154743 | 0.980018 | 0.155556 | 6.300092 | -4528,.. | 1.938419 | 12638.081816 | -997.882414 | |
| 14 | suppressed | 0.705282 | -14088.876587 | 0.974698 | 0.152134 | 6.406837 | -3837,.. | 2.043591 | 11932.788908 | -1056.594018 | |
| 21 | suppressed | 0.705188 | -8305.931026 | 0.962993 | 0.144902 | 6.645806 | -8188,.. | 2.340227 | 21291.674323 | -1969.240440 | |
| 48 | suppressed | 0.705135 | -14723.661981 | 0.972560 | 0.172590 | 5.635099 | -3644,.. | 1.984678 | 11012.099537 | -1135.085789 | |
| 36 | suppressed | 0.705020 | -14820.934033 | 0.977018 | 0.168904 | 5.784454 | -3864,.. | 1.823897 | 11427.236652 | -409.257273 | |
| 120 | suppressed | 0.704992 | -14451.163255 | 0.978946 | 0.175247 | 5.586075 | -4413,.. | 1.908539 | 11918.222660 | -1354.407693 | |
| 140 | suppressed | 0.704955 | -12370.971274 | 0.974654 | 0.156738 | 6.218381 | -6621,.. | 1.886788 | 15890.019253 | -1998.100752 | |
| 124 | suppressed | 0.704954 | -11465.242231 | 0.967891 | 0.169187 | 5.720832 | -7332,.. | 1.946722 | 17430.891653 | -2504.510853 | |
| 139 | suppressed | 0.704909 | -13699.534998 | 0.972513 | 0.166307 | 5.847711 | -4736,.. | 2.000360 | 12896.912172 | -1539.780510 | |
| 100 | suppressed | 0.704803 | -15229.762285 | 0.991523 | 0.166159 | 5.967320 | -3816,.. | 1.799674 | 10755.995452 | -589.810876 | |
| 187 | suppressed | 0.704578 | -14778.630303 | 0.961413 | 0.175678 | 5.472591 | -4233,.. | 1.857307 | 11279.798987 | -1160.039032 | |
| 181 | suppressed | 0.704422 | -15758.529236 | 0.994799 | 0.192793 | 5.159941 | -3386,.. | 1.755134 | 10047.228292 | -689.029558 | |
| 3 | suppressed | 0.704033 | -14230.229409 | 0.971167 | 0.173523 | 5.596750 | -3634,.. | 1.964398 | 11583.092292 | -1240.207485 | |
| 7 | suppressed | 0.704010 | -13187.195178 | 0.972048 | 0.172629 | 5.630850 | -5467,.. | 2.011791 | 13803.072026 | -2213.981689 | |
| 25 | suppressed | 0.703962 | -15155.136559 | 0.974104 | 0.186596 | 5.220385 | -3974,.. | 1.817961 | 10780.247655 | -1283.696949 | |
| 60 | suppressed | 0.703665 | -15048.014390 | 1.002399 | 0.193495 | 5.180495 | -4302,.. | 1.708500 | 11684.422506 | -767.178284 | |
| 46 | suppressed | 0.703537 | -14556.597842 | 0.979293 | 0.175544 | 5.578615 | -4895,.. | 1.762471 | 12243.393601 | -1571.594710 | |
| 16 | suppressed | 0.703486 | -15225.570281 | 0.971553 | 0.173898 | 5.586916 | -3398,.. | 1.814056 | 10347.076762 | -929.536865 | |
| 123 | suppressed | 0.703194 | -16545.540324 | 0.981143 | 0.209328 | 4.687109 | -3038,.. | 1.554067 | 8884.689560 | -406.982919 | |
| 72 | suppressed | 0.703165 | -15850.112081 | 0.973095 | 0.203243 | 4.787836 | -3353,.. | 1.773192 | 9331.497704 | -1317.958182 | |
| 75 | suppressed | 0.698884 | -18233.733094 | 0.996786 | 0.311778 | 3.197099 | -2342,.. | 1.082989 | 6792.692662 | 194.093183 | |
| 38 | suppressed | 0.695414 | -13623.049624 | 0.994722 | 0.202557 | 4.910824 | -3831,.. | 2.396506 | 8711.652512 | -5313.586987 | |
| 138 | suppressed | 0.694687 | -15089.902655 | 1.014728 | 0.220736 | 4.597027 | -3338,.. | 2.119809 | 7852.274101 | -4247.636299 | |
| 23 | suppressed | 0.694529 | -16078.835412 | 1.013021 | 0.181807 | 5.571966 | 3802,.. | 1.433959 | -10023.210293 | -637.569871 | |
| 152 | suppressed | 0.694514 | -15316.049402 | 0.999945 | 0.148745 | 6.722527 | 4993,.. | 1.550912 | -11111.614743 | -468.139196 | |
| 11 | suppressed | 0.694501 | -16062.485560 | 0.998670 | 0.169212 | 5.901884 | 3614,.. | 1.456914 | -9724.916594 | -835.648705 | |
| 112 | suppressed | 0.694494 | -13742.600462 | 0.997189 | 0.111924 | 8.909516 | 6957,.. | 1.689719 | -13587.249131 | -837.856520 | |
| 175 | suppressed | 0.694472 | -14164.875513 | 0.996417 | 0.138673 | 7.185389 | 5902,.. | 1.575634 | -13248.641898 | -1130.557415 | |
| 66 | suppressed | 0.694472 | -15002.774532 | 1.008638 | 0.126429 | 7.977893 | 5820,.. | 1.604546 | -11564.776019 | -506.606129 | |
| 168 | suppressed | 0.694467 | -13724.735721 | 1.004490 | 0.133346 | 7.532940 | 6287,.. | 1.566653 | -14218.684203 | -1415.891101 | |
| 184 | suppressed | 0.694458 | -15387.275370 | 0.994654 | 0.188431 | 5.278623 | 2995,.. | 1.401132 | -11123.177265 | -1788.048732 | |
| 133 | suppressed | 0.694448 | -12414.691250 | 0.994884 | 0.109906 | 9.052176 | 9312,.. | 1.711392 | -15688.551544 | -1395.469676 | |
| 93 | suppressed | 0.694435 | -14169.410972 | 1.002387 | 0.124961 | 8.021589 | 9148,.. | 1.639763 | -12731.429311 | 16.455778 | |
| 62 | suppressed | 0.694433 | -12264.565203 | 1.005852 | 0.122552 | 8.207558 | 9222,.. | 1.686218 | -16241.115544 | -1299.554446 | |
| 183 | suppressed | 0.694432 | -14371.123342 | 0.994981 | 0.193133 | 5.151792 | 3347,.. | 1.392146 | -13286.716322 | -2216.436068 | |
| 53 | suppressed | 0.694412 | -15834.883136 | 1.005492 | 0.156837 | 6.411059 | 4776,.. | 1.484806 | -10460.244360 | -203.317795 | |
| 96 | suppressed | 0.694405 | -15150.371077 | 1.000244 | 0.148052 | 6.756014 | 3849,.. | 1.513579 | -11384.948211 | -1342.632396 | |
| 68 | suppressed | 0.694395 | -13458.785667 | 0.997705 | 0.120960 | 8.248227 | 8730,.. | 1.687686 | -14068.475141 | -372.668187 | |
| 176 | suppressed | 0.694343 | -14975.228364 | 1.005385 | 0.155619 | 6.460571 | 3759,.. | 1.504388 | -11672.541902 | -1730.282171 | |
| 74 | suppressed | 0.694342 | -15150.985358 | 1.003605 | 0.131100 | 7.655257 | 7033,.. | 1.593152 | -11181.438381 | 82.700783 | |
| 128 | suppressed | 0.694338 | -16536.467718 | 1.000250 | 0.227072 | 4.404989 | 2682,.. | 1.298022 | -9320.232085 | -954.802551 | |
| 185 | suppressed | 0.694315 | -13738.270855 | 1.009902 | 0.164943 | 6.122722 | 4432,.. | 1.465954 | -14439.167424 | -2029.086214 | |
| 173 | suppressed | 0.694313 | -15883.089098 | 1.010970 | 0.174569 | 5.791239 | 4027,.. | 1.474913 | -10251.825983 | -556.507463 | |
| 49 | suppressed | 0.694281 | -17244.301344 | 0.992480 | 0.222056 | 4.469505 | 2967,.. | 1.288843 | -7734.327905 | -274.185714 | |
| 197 | suppressed | 0.694280 | -13643.979833 | 1.000934 | 0.194265 | 5.152403 | 2254,.. | 1.498778 | -13408.452129 | -4116.620574 | |
| 87 | suppressed | 0.694267 | -16629.203447 | 0.995603 | 0.191165 | 5.208070 | 2656,.. | 1.417589 | -8220.411749 | -1115.364132 | |
| 171 | suppressed | 0.694237 | -14655.811006 | 1.003676 | 0.138184 | 7.263338 | 7563,.. | 1.574202 | -12387.416218 | -72.885598 | |
| 134 | suppressed | 0.694172 | -8769.493202 | 1.000198 | 0.090513 | 11.050361 | 14864,.. | 1.768039 | -21542.687741 | -2743.714968 | |
| 6 | suppressed | 0.694171 | -15555.987352 | 0.994548 | 0.158038 | 6.293098 | 5609,.. | 1.512444 | -10892.025742 | 25.127517 | |
| 107 | suppressed | 0.693943 | -14159.521215 | 1.008739 | 0.152237 | 6.626090 | 3627,.. | 1.475668 | -13374.189378 | -2417.045582 | |
| 98 | suppressed | 0.693925 | -16305.551122 | 1.003250 | 0.175467 | 5.717609 | 5049,.. | 1.439092 | -9610.628337 | 425.002572 | |
| 19 | suppressed | 0.693917 | -13874.488687 | 1.017081 | 0.181240 | 5.611794 | 4695,.. | 1.447960 | -14482.376802 | -1655.864059 | |
| 71 | suppressed | 0.693914 | -16364.352404 | 0.996756 | 0.171728 | 5.804261 | 5454,.. | 1.415694 | -9540.638665 | 400.032835 | |
| 137 | suppressed | 0.693901 | -15117.836331 | 0.989319 | 0.156571 | 6.318654 | 2208,.. | 1.636275 | -9712.776640 | -3152.350007 | |
| 92 | suppressed | 0.693895 | -16730.019051 | 1.005339 | 0.200997 | 5.001760 | 3714,.. | 1.329585 | -9253.567844 | 53.840156 | |
| 151 | suppressed | 0.693877 | -16347.669237 | 1.001342 | 0.167465 | 5.979419 | 5545,.. | 1.433163 | -9411.976001 | 345.875164 | |
| 177 | suppressed | 0.693819 | -16232.167039 | 1.000425 | 0.232371 | 4.305291 | 1694,.. | 1.396094 | -8720.478226 | -2582.689007 | |
| 76 | suppressed | 0.693783 | -14796.647482 | 0.990767 | 0.143743 | 6.892647 | 1966,.. | 1.729334 | -9296.541519 | -4186.106620 | |
| 169 | suppressed | 0.693747 | -14755.260130 | 0.996178 | 0.135208 | 7.367745 | 3340,.. | 1.640316 | -11050.095772 | -2165.412654 | |
| 41 | suppressed | 0.693740 | -17832.648444 | 1.000571 | 0.273132 | 3.663323 | 2284,.. | 1.174987 | -6705.237699 | -690.279919 | |
| 29 | suppressed | 0.693730 | -16794.469280 | 1.006752 | 0.190572 | 5.282797 | 4328,.. | 1.349449 | -9075.294297 | 259.033757 | |
| 174 | suppressed | 0.693650 | -16425.707625 | 1.000743 | 0.190659 | 5.248862 | 2101,.. | 1.450030 | -8047.013677 | -2143.823229 | |
| 73 | suppressed | 0.693591 | -15905.953102 | 0.997153 | 0.195337 | 5.104782 | 1864,.. | 1.526860 | -8521.780361 | -2896.593651 | |
| 22 | suppressed | 0.693492 | -16180.359940 | 0.999066 | 0.145244 | 6.878535 | 4600,.. | 1.491685 | -9230.592814 | -54.956971 | |
| 37 | suppressed | 0.693463 | -12675.489582 | 1.004201 | 0.131172 | 7.655599 | 1798,.. | 1.885736 | -10392.263597 | -7123.273765 | |
| 51 | suppressed | 0.693445 | -15151.229348 | 0.994113 | 0.132859 | 7.482468 | 6056,.. | 1.614267 | -11064.439221 | -189.300936 | |
| 63 | suppressed | 0.693369 | -15653.765872 | 0.999983 | 0.153297 | 6.523169 | 2486,.. | 1.631502 | -8682.278627 | -2330.610230 | |
| 146 | suppressed | 0.693074 | -16487.993317 | 1.017384 | 0.179676 | 5.662324 | 6159,.. | 1.396452 | -9417.684588 | 834.733301 | |
| 50 | suppressed | 0.693036 | -14052.624639 | 0.970005 | 0.117852 | 8.230672 | 6736,.. | 1.675559 | -13023.202721 | 16.495359 | |
| 20 | suppressed | 0.693028 | -12364.355925 | 1.003365 | 0.115630 | 8.677385 | 3367,.. | 1.765088 | -14711.046014 | -4489.799946 | |
| 194 | suppressed | 0.692945 | -15805.385445 | 0.989678 | 0.149223 | 6.632210 | 6822,.. | 1.489779 | -9956.787115 | 256.602057 | |
| 80 | suppressed | 0.692882 | -14273.457012 | 1.004135 | 0.139847 | 7.180234 | 10160,.. | 1.578089 | -12072.029588 | 1076.776803 | |
| 116 | suppressed | 0.692686 | -18097.210380 | 1.016817 | 0.285228 | 3.564926 | 2972,.. | 1.087331 | -7160.449564 | 593.454611 | |
| 132 | suppressed | 0.692673 | -18432.020836 | 0.989300 | 0.318265 | 3.108419 | 1567,.. | 1.016520 | -5534.968765 | -807.381211 | |
| 180 | suppressed | 0.692585 | -16033.236868 | 1.001339 | 0.156398 | 6.402497 | 1619,.. | 1.558806 | -7120.521525 | -3753.549888 | |
| 15 | suppressed | 0.692562 | -18077.831238 | 0.999079 | 0.290434 | 3.439956 | 1318,.. | 1.106852 | -5624.860606 | -1572.510799 | |
| 9 | suppressed | 0.692526 | -14921.955589 | 1.000281 | 0.090554 | 11.046244 | 4794,.. | 1.701320 | -10596.164661 | -1395.522557 | |
| 167 | suppressed | 0.692142 | -13294.262711 | 1.001934 | 0.249682 | 4.012837 | 927,.. | 1.347249 | 3253.720938 | 18514.245775 | |
| 65 | suppressed | 0.692015 | -15158.321299 | 0.999915 | 0.268486 | 3.724273 | 2478,.. | 1.166006 | -12583.383401 | -1746.586390 | |
| 40 | suppressed | 0.691969 | -14626.548881 | 0.998482 | 0.177429 | 5.627515 | 999,.. | 1.559129 | -8725.830443 | -5958.660394 | |
| 85 | suppressed | 0.691758 | -12029.802566 | 0.987214 | 0.056552 | 17.456793 | 5942,.. | 1.867132 | -15302.839376 | -3199.799481 | |
| 145 | suppressed | 0.691695 | -14977.745283 | 0.998701 | 0.092298 | 10.820350 | 4117,.. | 1.700267 | -10412.430478 | -1534.004474 | |
| 121 | suppressed | 0.691649 | -16852.495659 | 1.002502 | 0.198143 | 5.059479 | 1279,.. | 1.448922 | -5766.831824 | -3153.164930 | |
| 109 | suppressed | 0.691624 | 11.105179 | 0.990409 | 0.092495 | 10.707753 | -13237,.. | 2.294190 | 32030.184953 | 742.500663 | |
| 42 | suppressed | 0.691413 | -14119.200441 | 0.999907 | 0.162818 | 6.141246 | 1923,.. | 1.683997 | -10660.683793 | -4411.154839 | |
| 102 | suppressed | 0.691146 | -15496.540162 | 1.004959 | 0.155285 | 6.471688 | 6389,.. | 1.556367 | -9758.924938 | 283.301817 | |
| 95 | suppressed | 0.690974 | -13939.120994 | 0.999838 | 0.264063 | 3.786364 | -6590,.. | 1.228277 | 14836.846520 | -360.605470 | |
| 88 | suppressed | 0.690820 | -14844.309111 | 1.014893 | 0.167929 | 6.043600 | 2945,.. | 1.416806 | -12693.119033 | -1712.761478 | |
| 188 | suppressed | 0.690502 | -12473.763646 | 0.994681 | 0.161716 | 6.150772 | 480,.. | 1.511505 | 3134.442980 | 19726.771878 | |
| 4 | suppressed | 0.690244 | -15635.011527 | 1.001321 | 0.339025 | 2.953533 | 808,.. | 1.074127 | 997.594195 | 15398.035053 | |
| 182 | suppressed | 0.689944 | -14860.967741 | 1.017382 | 0.367334 | 2.769637 | 979,.. | 1.026860 | 373.566810 | 17919.754793 | |
| 136 | suppressed | 0.689725 | -15719.484869 | 1.009689 | 0.213316 | 4.733294 | 1237,.. | 1.362489 | 1993.645529 | 13282.476210 | |
| 108 | suppressed | 0.689267 | -16868.286217 | 1.010455 | 0.398269 | 2.537115 | 1003,.. | 0.951770 | 500.213960 | 13192.986270 | |
| 179 | suppressed | 0.688211 | -14695.841635 | 1.002931 | 0.144639 | 6.934023 | 3931,.. | 1.532106 | -12959.688972 | -216.249924 | |
| 78 | suppressed | 0.688161 | -14766.394484 | 0.995935 | 0.285833 | 3.484327 | 269,.. | 1.178970 | 2963.916969 | -9866.555736 | |
| 58 | suppressed | 0.688110 | -14520.763303 | 0.993134 | 0.221677 | 4.480086 | -57,.. | 1.317097 | 3121.352760 | -10271.443008 | |
| 158 | suppressed | 0.687926 | -12973.184141 | 1.006087 | 0.053497 | 18.806378 | 6828,.. | 2.049839 | -11560.467523 | -3524.108415 | |
| 69 | suppressed | 0.687829 | -15418.965515 | 0.993655 | 0.194727 | 5.102813 | 67,.. | 1.446613 | 2510.251568 | -9048.627678 | |
| 97 | suppressed | 0.687648 | -14316.538926 | 0.998669 | 0.093484 | 10.682791 | 334,.. | 1.750742 | 5900.149692 | 11360.419684 | |
| 101 | suppressed | 0.687579 | -15265.916851 | 1.000543 | 0.210953 | 4.742970 | 54,.. | 1.394806 | 2837.518386 | -9249.189096 | |
| 144 | suppressed | 0.687376 | -17053.219866 | 0.996680 | 0.255810 | 3.896177 | 267,.. | 1.238990 | 2153.952743 | -6639.572600 | |
| 199 | suppressed | 0.687220 | -11473.991534 | 0.986581 | 0.147360 | 6.695019 | -67,.. | 1.615409 | 3719.498433 | -14477.496567 | |
| 150 | suppressed | 0.687185 | -18000.814671 | 0.999958 | 0.310344 | 3.222093 | 99,.. | 1.064935 | 1899.399844 | -5308.274796 | |
| 119 | suppressed | 0.687071 | -15847.464550 | 1.018856 | 0.171281 | 5.948439 | 4099,.. | 1.444477 | -11383.878598 | 737.467934 | |
| 104 | suppressed | 0.686985 | -17326.854985 | 1.002006 | 0.357138 | 2.805658 | 456,.. | 1.042707 | 2365.938768 | -6175.180399 | |
| 159 | suppressed | 0.686388 | -12400.171387 | 0.968001 | 0.177878 | 5.441929 | -209,.. | 1.478802 | 4078.192048 | -12958.317469 | |
| 61 | suppressed | 0.686180 | -18478.487653 | 1.000314 | 0.323603 | 3.091179 | 6,.. | 0.965752 | 1754.121881 | -4706.724191 | |
| 91 | suppressed | 0.685937 | -16890.233842 | 0.990974 | 0.403501 | 2.455938 | 945,.. | 0.877447 | -500.576252 | 13477.920046 | |
| 39 | suppressed | 0.685844 | -12491.236604 | 0.992076 | 0.189296 | 5.240883 | 14,.. | 1.431698 | 4059.983855 | -12945.296033 | |
| 84 | suppressed | 0.685798 | -16772.993369 | 1.001093 | 0.164108 | 6.100214 | 533,.. | 1.397848 | 4062.851661 | 8565.301443 | |
| 164 | suppressed | 0.685721 | -15322.547333 | 1.000197 | 0.268926 | 3.719232 | -64,.. | 1.206657 | 2862.452431 | -9067.590347 | |
| 191 | suppressed | 0.685667 | -13083.237456 | 0.997780 | 0.051077 | 19.534712 | 9402,.. | 1.822835 | -14663.426470 | 925.053849 | |
| 34 | suppressed | 0.684785 | -19254.480430 | 1.010454 | 0.423364 | 2.386724 | 927,.. | 0.801820 | 2243.350482 | 6157.224735 | |
| 186 | suppressed | 0.684724 | -14336.818556 | 0.995270 | 0.065934 | 15.094839 | 4920,.. | 1.799114 | -12694.830772 | 949.860274 | |
| 125 | suppressed | 0.684648 | -7343.474624 | 1.003233 | 0.003889 | 257.959484 | 13378,.. | 2.425804 | -16410.723754 | -8085.906606 | |
| 195 | suppressed | 0.684487 | -15088.262603 | 1.010624 | 0.081418 | 12.412841 | 4535,.. | 1.707507 | -11767.027918 | 1212.078376 | |
| 44 | suppressed | 0.684426 | -12602.203000 | 0.992964 | 0.118584 | 8.373496 | 2830,.. | 1.587170 | -16689.161603 | -1470.724165 | |
| 57 | suppressed | 0.684315 | -15432.307927 | 0.987260 | 0.103845 | 9.507039 | 3405,.. | 1.599209 | -11344.106114 | 1090.086289 | |
| 35 | suppressed | 0.684293 | -15977.929676 | 1.005304 | 0.146402 | 6.866719 | 3560,.. | 1.468133 | -10904.424687 | 1216.334643 | |
| 90 | suppressed | 0.683921 | -10368.230592 | 1.006699 | 0.020568 | 48.945316 | 9070,.. | 2.053029 | -18137.990629 | 1116.859320 | |
| 8 | suppressed | 0.683656 | -14324.525128 | 1.003344 | 0.067062 | 14.961539 | 3683,.. | 1.755301 | -12820.389101 | 1245.937888 | |
| 143 | suppressed | 0.683650 | -13322.100404 | 1.004908 | 0.044540 | 22.561888 | 4260,.. | 1.889706 | -13977.907723 | 1415.837901 | |
| 149 | suppressed | 0.683370 | -9704.420410 | 0.989848 | 0.040784 | 24.270205 | 3023,.. | 1.967927 | -20216.342673 | -1731.741537 | |
| 13 | suppressed | 0.683208 | -12418.264857 | 1.002041 | 0.032487 | 30.844432 | 3250,.. | 1.941987 | -14761.860714 | 1700.616363 | |
| 81 | suppressed | 0.683157 | -8065.420824 | 0.997320 | 0.026629 | 37.452357 | 5560,.. | 2.111768 | -22311.211588 | -1943.523849 | |
| 155 | suppressed | 0.683082 | -5851.033242 | 1.003461 | 0.015278 | 65.681385 | 4198,.. | 2.211873 | -23059.925071 | 1637.390623 | |
| 162 | suppressed | 0.683080 | -13541.997285 | 1.003286 | 0.067781 | 14.801812 | 2761,.. | 1.737107 | -14044.565526 | 979.855649 | |
| 118 | suppressed | 0.682673 | -12636.113605 | 0.996464 | 0.097281 | 10.243117 | 784,.. | 1.652231 | -1953.723729 | 18329.698956 | |
| 129 | suppressed | 0.682144 | -15346.049849 | 1.001480 | 0.170969 | 5.857670 | 1160,.. | 1.378440 | -1449.402411 | 14096.422182 | |
| 10 | suppressed | 0.682071 | -15331.747861 | 0.999931 | 0.149585 | 6.684724 | 823,.. | 1.418027 | -1666.154083 | 13136.159489 | |
| 94 | suppressed | 0.681737 | -9516.486317 | 0.999176 | 0.029912 | 33.403877 | 2670,.. | 2.135083 | -18170.578388 | 779.589003 | |
| 126 | suppressed | 0.681299 | -16683.012731 | 1.014760 | 0.192281 | 5.277496 | 1351,.. | 1.308661 | -9672.752252 | 1127.767688 | |
| 47 | suppressed | 0.681072 | -11282.463532 | 0.985347 | 0.102623 | 9.601607 | 1039,.. | 1.626720 | -1918.956494 | 22000.990635 | |
| 1 | suppressed | 0.680545 | -14839.037213 | 1.003692 | 0.083693 | 11.992469 | 829,.. | 1.717836 | -676.043069 | 10562.278890 | |
| 31 | suppressed | 0.680447 | -14652.873022 | 1.015388 | 0.171427 | 5.923161 | 1006,.. | 1.402128 | -1204.504112 | 15714.993221 | |
| 110 | suppressed | 0.680445 | -10737.317516 | 0.996909 | 0.038054 | 26.197516 | 1706,.. | 1.984588 | -17787.806490 | -1832.951702 | |
| 103 | suppressed | 0.680326 | -9534.988898 | 1.027888 | 0.061813 | 16.629039 | 2996,.. | 1.967403 | -18716.013085 | -6072.519659 | |
| 198 | suppressed | 0.680190 | -14300.365282 | 1.009282 | 0.350376 | 2.880571 | -7534,.. | 1.057658 | 14420.431194 | -580.803961 | |
| 64 | suppressed | 0.680186 | -13690.852850 | 0.974986 | 0.254555 | 3.830155 | 1094,.. | 1.124125 | -2341.691988 | 19657.579257 | |
| 172 | suppressed | 0.679898 | -12051.276254 | 0.978729 | 0.162753 | 6.013589 | 456,.. | 1.591660 | 3391.185915 | -12443.880252 | |
| 156 | suppressed | 0.679765 | -9414.953722 | 1.018092 | 0.040578 | 25.089760 | 809,.. | 2.035834 | -1694.158014 | 22864.795059 | |
| 86 | suppressed | 0.678314 | -18485.870253 | 1.008287 | 0.315098 | 3.199922 | 425,.. | 0.955099 | -1446.853313 | 7834.074132 | |
| 166 | suppressed | 0.674830 | -10213.109846 | 1.006027 | 0.009063 | 111.007384 | 589,.. | 2.314159 | 1374.735976 | 15516.958973 | |
| 17 | suppressed | 0.674305 | -7399.358588 | 0.994818 | 0.006210 | 160.208548 | -26,.. | 2.443177 | 6976.009462 | 18384.433629 | |
| 33 | suppressed | 0.674213 | -11999.325020 | 0.997773 | 0.026210 | 38.067764 | 947,.. | 2.166395 | 2392.176925 | 14258.187687 | |
| 153 | suppressed | 0.667498 | -10779.112703 | 0.995397 | 0.125103 | 7.956619 | 3265,.. | 1.909201 | 1601.895608 | -14270.021543 | |
| 160 | suppressed | 0.666145 | -12184.310797 | 0.953825 | 0.092485 | 10.313298 | 1992,.. | 1.802685 | 1670.779356 | -11301.186076 | |
| 154 | suppressed | 0.660428 | -11935.061554 | 1.001120 | 0.144910 | 6.908575 | 4217,.. | 1.810810 | 1509.728158 | -11271.466543 | |
| 161 | suppressed | 0.658164 | -9106.352917 | 1.010160 | 0.048330 | 20.901503 | -8894,.. | 2.054313 | 17810.362985 | 2576.397405 |
# visualisation of the motif with the highest r
print('I: Best motif according to r from the repeated optimizations.')
print('I: PCA components: %i, %i' %(df_repetitions.iloc[0]['PCA1'], df_repetitions.iloc[0]['PCA2']))
model_best_repetition=df_repetitions.iloc[0]['model']
model_best_repetition.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
# store results and display stages
STAGES.append('best repetition', model_best_repetition)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
I: Best motif according to r from the repeated optimizations. I: PCA components: 19825, -1553 I: energy matrix and logos:
A C G T
0 -7053 14371 -8323 1004
1 5640 -4676 5637 -6602
2 349 -2266 2198 -281
3 -790 1047 965 -1221
I: summed absolute energies of each position:
0 30752
1 22557
2 5096
3 4024
dtype: int64
I: averaged summed energy over all positions: 15607
I: Mean and Standard Deviation for the Free Energy G to all subsequences of all probes: -9109 +/- 9135
I: Plot of the Occupancy of a subsite as the function of the Free Energy G
overlaid with the distribution of the Free Energy of all subsites.
I: There shall be only a small overlap of both curves. i.e. only the most negative Free Energies
lead to a measurable occupancy.
I: Calculated occupancy over all subsite of a single probe: binding: 0.13331 .. 0.97203 (ratio: 7.3) I: number of probes: 3149 I: Pearson Correlation r: 0.7070 I: mean absolute error: 0.1028
I: Classification performance AUROC: 0.8295
| stage | protein | # probes | motif length | r | AUROC | G0 | G0 fitted | ratio | max binding | min binding | energies | model | logo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | best repetition | T7 primase | 3149 | 4 | 0.706996 | 0.829541 | -9177.028204 | True | 7.291495 | 0.972033 | 0.133311 | -7053,.. | suppressed |
# refit model to minimize mae
last_model=STAGES.df.at[max(STAGES.df.index),'model']
refitted_model_best_repetition=model_best_repetition.refit_mae(X,y)
# print & display main results
refitted_model_best_repetition.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
# store results and display stages
STAGES.append('mae optimzed', refitted_model_best_repetition)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
Optimization terminated successfully.
Current function value: 0.098459
Iterations: 8
Function evaluations: 1734
I: energy matrix and logos:
A C G T
0 -6638 15358 -8211 -508
1 5720 -4656 5614 -6678
2 35 -2089 2203 -149
3 -482 1071 713 -1302
I: summed absolute energies of each position:
0 30716
1 22669
2 4478
3 3569
dtype: int64
I: averaged summed energy over all positions: 15358
I: Mean and Standard Deviation for the Free Energy G to all subsequences of all probes: -9090 +/- 9549
I: Plot of the Occupancy of a subsite as the function of the Free Energy G
overlaid with the distribution of the Free Energy of all subsites.
I: There shall be only a small overlap of both curves. i.e. only the most negative Free Energies
lead to a measurable occupancy.
I: Calculated occupancy over all subsite of a single probe: binding: 0.13499 .. 0.97839 (ratio: 7.3) I: number of probes: 3149 I: Pearson Correlation r: 0.6931 I: mean absolute error: 0.0985
I: Classification performance AUROC: 0.8204
| stage | protein | # probes | motif length | r | AUROC | G0 | G0 fitted | ratio | max binding | min binding | energies | model | logo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | best repetition | T7 primase | 3149 | 4 | 0.706996 | 0.829541 | -9177.028204 | True | 7.291495 | 0.972033 | 0.133311 | -7053,.. | suppressed | |
| 1 | mae optimzed | T7 primase | 3149 | 4 | 0.693057 | 0.820350 | -9177.028204 | True | 7.291495 | 0.978392 | 0.134989 | -6638,.. | suppressed |
### Based on the motif of CORE_MOTIF_LENGTH analyze the neigbouring positions
### whether their inclusion can improve the quality of the motif
df_positions=model_best_repetition.investigate_extension_parallel(X, y, end5=3, end3=3, nuc_type=NUC_TYPE)
list_positions=df_positions.index[df_positions['+2%']].tolist()+[0] # list of positions with an increase of2% and default position 0
ext5=-min(list_positions)
ext3=max(list_positions)
print("I: It is suggested to extend the core motif at the 5' end by %i and at the 3' end by %i positions." %(ext5, ext3))
0%| | 0/6 [00:00<?, ?engine/s]
job5: 0%| | 0/3 [00:00<?, ?tasks/s]
job3: 0%| | 0/3 [00:00<?, ?tasks/s]
I: Optimization took 0.02 hours.
I: It is suggested to extend the core motif at the 5' end by 0 and at the 3' end by 0 positions.
### fit & predict optimization starting with extended energy matrix if extension appears to improve prediction
if ext5+ext3!=0: #extension suggestion from previous analysis of the bordering positions
expanded_energies=model_best_repetition.energies_
# append energies of single-optimized bordering positions to energies of central part
if ext5!=0:
energies_5=np.concatenate(df_positions['energies'][(df_positions.index<0) & (df_positions.index>=-ext5)].to_numpy())
expanded_energies=np.concatenate((energies_5, expanded_energies))
if ext3!=0:
energies_3=np.concatenate(df_positions['energies'][(df_positions.index<=ext3) & (df_positions.index>0)].to_numpy().flatten())
expanded_energies=np.concatenate((expanded_energies, energies_3))
mf.energies2logo(expanded_energies, nuc_type=NUC_TYPE)
print('I: Optimization started with following extended motif.')
expanded_motif_length=len(expanded_energies)//4
model_extended=mf.findmotif(motif_length=expanded_motif_length, protein_conc=PROT_CONC, both_strands=BOTH_STRANDS,
start=expanded_energies, fit_G0=True)
start = time()
model_extended.fit(X,y)
print("Optimization took %.2f hours." % ((time() - start)/3600))
# print & display main results
model_extended.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
# store results and display stages
STAGES.append('extended', model_extended)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
else:
print('I: Motif is not extended based on previous analysis.')
I: Motif is not extended based on previous analysis.
### fit & predict optimization starting with extended energy matrix plus one bordering position on each side if current bordering position exceed the information of 0.25
last_model=STAGES.df.at[max(STAGES.df.index),'model']
I_5=mf.energies2information(last_model.energies_[0:4])>=0.25 #sufficient information content of 5' end position
I_3=mf.energies2information(last_model.energies_[-4:])>=0.25 #sufficient information content of 3' end position
if I_5 or I_3:
print('I: At least one of the bordering positions of the current motif has an information content of at least 0.25. Extending.')
expanded_energies_with_border=mf.modify_energies(last_model.energies_, end5=I_5, end3=I_3)
mf.energies2logo(expanded_energies_with_border, nuc_type=NUC_TYPE)
motif_length_with_border=len(expanded_energies_with_border)//4
model_with_border=mf.findmotif(motif_length=motif_length_with_border, protein_conc=PROT_CONC, both_strands=BOTH_STRANDS,
start=expanded_energies_with_border, fit_G0=True)
start = time()
model_with_border.fit(X,y)
print("Optimization took %.2f hours." % ((time() - start)/3600))
# print & display main results
model_with_border.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
# store results and display stages
STAGES.append('expanded, border', model_with_border)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
else:
print('I: Both bordering positions of the found motif have an information content below 0.25. No futher optimization required.')
I: At least one of the bordering positions of the current motif has an information content of at least 0.25. Extending.
Optimization took 0.23 hours. I: energy matrix and logos:
A C G T
0 1225 -591 -397 -236
1 -7152 9225 -8511 6438
2 5548 -4640 5593 -6501
3 428 -2416 2263 -275
4 -937 900 1026 -989
I: summed absolute energies of each position:
0 2451
1 31326
2 22284
3 5383
4 3854
dtype: int64
I: averaged summed energy over all positions: 13060
I: Mean and Standard Deviation for the Free Energy G to all subsequences of all probes: -8708 +/- 7835
I: Plot of the Occupancy of a subsite as the function of the Free Energy G
overlaid with the distribution of the Free Energy of all subsites.
I: There shall be only a small overlap of both curves. i.e. only the most negative Free Energies
lead to a measurable occupancy.
I: Calculated occupancy over all subsite of a single probe: binding: 0.07823 .. 0.97513 (ratio: 12.5) I: number of probes: 3149 I: Pearson Correlation r: 0.7229 I: mean absolute error: 0.1000
I: Classification performance AUROC: 0.8395
| stage | protein | # probes | motif length | r | AUROC | G0 | G0 fitted | ratio | max binding | min binding | energies | model | logo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | best repetition | T7 primase | 3149 | 4 | 0.706996 | 0.829541 | -9177.028204 | True | 7.291495 | 0.972033 | 0.133311 | -7053,.. | suppressed | |
| 1 | mae optimzed | T7 primase | 3149 | 4 | 0.693057 | 0.820350 | -9177.028204 | True | 7.291495 | 0.978392 | 0.134989 | -6638,.. | suppressed | |
| 2 | expanded, border | T7 primase | 3149 | 5 | 0.722890 | 0.839472 | -8721.403785 | True | 12.464345 | 0.975132 | 0.078234 | 1225,.. | suppressed |
last_model=STAGES.df.at[max(STAGES.df.index),'model']
df_relevant_positions=last_model.explore_positions(X, y)
list_positions=df_relevant_positions.index[df_relevant_positions['-2%']].tolist() # list of positions with an increase of2% and default position 0
start_relevant=min(list_positions)
end_relevant=max(list_positions)
red5=-start_relevant
red3=end_relevant-len(df_relevant_positions)+1
print('I: The analysis suggests, that positions between %i to %i contribute significantly to the motif' %(start_relevant, end_relevant))
last_model=STAGES.df.at[max(STAGES.df.index),'model']
if (end_relevant-start_relevant+1)in STAGES.df['motif length'].to_list():
print('I: No need for a further optimization. An optimization with motif length of %i has already been done.' %(end_relevant-start_relevant+1))
print('I: Checking whether G0 has been chosen correctly.')
last_model.investigate_G0(X, y)
else:
print('I: Bordering positions only marginally contributing towards regression quality are dropped.')
print('I: New start energy for motif optimization:')
start_final_model=mf.modify_energies(last_model.energies_, end5=red5, end3=red3)
mf.energies2logo(start_final_model, nuc_type=NUC_TYPE)
final_model=mf.findmotif(motif_length=len(start_final_model)//4, protein_conc=PROT_CONC,
both_strands=BOTH_STRANDS, start=start_final_model, fit_G0=True)
start = time()
final_model.fit(X,y)
print("Optimization took %.2f hours." % ((time() - start)/3600))
# print & display main results
final_model.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
print('I: Checking whether G0 has been chosen correctly.')
final_model.investigate_G0(X, y)
# store results and display stages
STAGES.append('shrinked', final_model)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
I: The analysis suggests, that positions between 0 to 4 contribute significantly to the motif I: No need for a further optimization. An optimization with motif length of 5 has already been done. I: Checking whether G0 has been chosen correctly. I: Current G0 = -8721 J/mol (see red broken line in figure below) with r = 0.723. I: Maximal r is 0.724 at G0=-6721 J/mol (see green broken line below). I: Maximal occupancy of 2 is reached at G0=-10721 J/mol (see blue broken line below). I: Maximal occupancy of 0.2 is reached at G0=-4721 J/mol (see blue broken line below).
I: G0 is in a range leading to maximal probe occupancy between 0.2 and 2. Good. I: Maximal r is close to r achieved with current G0. Good.
# refit model to minimize mae
last_model=STAGES.df.at[max(STAGES.df.index),'model']
refitted_model=last_model.refit_mae(X,y)
# print & display main results
refitted_model.analyse_motif(X,y, THRESHOLD, nuc_type=NUC_TYPE)
# store results and display stages
STAGES.append('final mae optimzed', refitted_model)
mf.display_df(STAGES.df, nuc_type=NUC_TYPE)
Optimization terminated successfully.
Current function value: 0.096293
Iterations: 11
Function evaluations: 2870
I: energy matrix and logos:
A C G T
0 1245 -457 -285 -502
1 -6968 12092 -8413 3288
2 5629 -4538 5498 -6588
3 -240 -2118 2580 -221
4 -608 943 840 -1175
I: summed absolute energies of each position:
0 2491
1 30762
2 22254
3 5161
4 3567
dtype: int64
I: averaged summed energy over all positions: 12847
I: Mean and Standard Deviation for the Free Energy G to all subsequences of all probes: -8674 +/- 8388
I: Plot of the Occupancy of a subsite as the function of the Free Energy G
overlaid with the distribution of the Free Energy of all subsites.
I: There shall be only a small overlap of both curves. i.e. only the most negative Free Energies
lead to a measurable occupancy.
I: Calculated occupancy over all subsite of a single probe: binding: 0.07442 .. 0.88433 (ratio: 12.5) I: number of probes: 3149 I: Pearson Correlation r: 0.7096 I: mean absolute error: 0.0963
I: Classification performance AUROC: 0.8235
| stage | protein | # probes | motif length | r | AUROC | G0 | G0 fitted | ratio | max binding | min binding | energies | model | logo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | best repetition | T7 primase | 3149 | 4 | 0.706996 | 0.829541 | -9177.028204 | True | 7.291495 | 0.972033 | 0.133311 | -7053,.. | suppressed | |
| 1 | mae optimzed | T7 primase | 3149 | 4 | 0.693057 | 0.820350 | -9177.028204 | True | 7.291495 | 0.978392 | 0.134989 | -6638,.. | suppressed | |
| 2 | expanded, border | T7 primase | 3149 | 5 | 0.722890 | 0.839472 | -8721.403785 | True | 12.464345 | 0.975132 | 0.078234 | 1225,.. | suppressed | |
| 3 | final mae optimzed | T7 primase | 3149 | 5 | 0.709649 | 0.823475 | -8721.403785 | True | 12.464345 | 0.884329 | 0.074416 | 1245,.. | suppressed |
STAGES.df['energies'].apply(mf.energies2information)
0 2.362101 1 2.285369 2 2.500981 3 2.437509 Name: energies, dtype: float64
STAGES.df.to_json('%s_%s-%s-%s_%s-%s.json' %(PROTEIN_NAME, datetime.now().year, datetime.now().month,datetime.now().day , datetime.now().hour, datetime.now().minute))